From: Jonathan Dieter Date: Sun, 29 Apr 2018 18:15:24 +0000 (+0300) Subject: Revamp file structure one last (hopefully) time so we can check the header X-Git-Tag: archive/raspbian/1.1.9+ds1-1+rpi1~1^2~299 X-Git-Url: https://dgit.raspbian.org/%22http://www.example.com/cgi/%22/%22http:/www.example.com/cgi/%22?a=commitdiff_plain;h=054e890cc39e437858f3e3a257289f897b37ee53;p=zchunk.git Revamp file structure one last (hopefully) time so we can check the header checksum without reading the whole header into memory in one go Signed-off-by: Jonathan Dieter --- diff --git a/src/lib/compint.c b/src/lib/compint.c index 4ee6bd8..89b9c7a 100644 --- a/src/lib/compint.c +++ b/src/lib/compint.c @@ -42,7 +42,8 @@ void compint_from_size(char *compint, size_t val, size_t *length) { return; } -int compint_to_size(size_t *val, const char *compint, size_t *length) { +int compint_to_size(size_t *val, const char *compint, size_t *length, + size_t max_length) { *val = 0; size_t old_val = 0; const unsigned char *i = (unsigned char *)compint; @@ -64,8 +65,12 @@ int compint_to_size(size_t *val, const char *compint, size_t *length) { break; i++; /* Make sure we're not overflowing and fail if we do */ - if(count > MAX_COMP_SIZE || *val < old_val) { - zck_log(ZCK_LOG_ERROR, "Number too large\n"); + if(count > MAX_COMP_SIZE || count+*length > max_length || + *val < old_val) { + if(count > max_length) + zck_log(ZCK_LOG_ERROR, "Read past end of header\n"); + else + zck_log(ZCK_LOG_ERROR, "Number too large\n"); *length -= count; *val = 0; return False; @@ -85,9 +90,10 @@ int compint_from_int(char *compint, int val, size_t *length) { return True; } -int compint_to_int(int *val, const char *compint, size_t *length) { +int compint_to_int(int *val, const char *compint, size_t *length, + size_t max_length) { size_t new = (size_t)*val; - if(!compint_to_size(&new, compint, length)) + if(!compint_to_size(&new, compint, length, max_length)) return False; *val = (int)new; if(*val < 0) { diff --git a/src/lib/dl/dl.c b/src/lib/dl/dl.c index b7f6616..77cf781 100644 --- a/src/lib/dl/dl.c +++ b/src/lib/dl/dl.c @@ -443,50 +443,48 @@ int PUBLIC zck_dl_get_header(zckCtx *zck, zckDL *dl, char *url) { zck->fd = dl->dst_fd; /* Download first hundred bytes and read magic and hash type */ - if(!zck_dl_bytes(dl, url, 100, start, &buffer_len)) + if(!zck_dl_bytes(dl, url, 200, start, &buffer_len)) return False; - if(!zck_read_initial(zck)) + if(!read_lead_1(zck)) return False; start = tell_data(dl->dst_fd); - /* If we haven't downloaded enough for the index hash plus a few others, do - * it now */ - if(!zck_dl_bytes(dl, url, zck->hash_type.digest_size+start+MAX_COMP_SIZE*4, + if(!zck_dl_bytes(dl, url, zck->lead_size + zck->hash_type.digest_size, start, &buffer_len)) return False; - /* Read and store the index hash */ - if(!zck_read_header_hash(zck)) + if(!read_lead_2(zck)) return False; - start += zck->hash_type.digest_size; zck_log(ZCK_LOG_DEBUG, "Header hash: (%s)", zck_hash_name_from_type(zck_get_full_hash_type(zck))); char *digest = zck_get_header_digest(zck); zck_log(ZCK_LOG_DEBUG, "%s\n", digest); free(digest); + start = tell_data(dl->dst_fd); - /* Read and store compression type and index size */ - if(!zck_read_ct_is(zck)) + /* If we haven't downloaded enough for the index hash plus a few others, do + * it now */ + if(!zck_dl_bytes(dl, url, zck->lead_size + zck->header_length, + start, &buffer_len)) return False; - start = tell_data(dl->dst_fd); - zck_log(ZCK_LOG_DEBUG, "Index size: %llu\n", zck->index_size); - /* Download and read rest of index */ - if(!zck_dl_bytes(dl, url, zck->index_size, start, - &buffer_len)) + /* Verify header checksum */ + if(!validate_header(zck)) return False; - if(!zck_header_hash(zck)) + zck_hash_close(&(zck->check_full_hash)); + + /* Read the header */ + if(!read_preface(zck)) return False; - if(!zck_read_index(zck)) + start += zck->preface_size; + zck_log(ZCK_LOG_DEBUG, "Index size: %llu\n", zck->index_size); + + /* Read the index */ + if(!read_index(zck)) return False; /* Read signatures */ - if(!zck_read_sig(zck)) + if(!read_sig(zck)) return False; - if(!close_read_header(zck)) - return False; - if(!zck_validate_header(zck)) - return False; - zck_hash_close(&(zck->check_full_hash)); /* Write zeros to rest of file */ zckIndex *info = &(dl->info.index); diff --git a/src/lib/header.c b/src/lib/header.c index 7e158fa..2b96aa5 100644 --- a/src/lib/header.c +++ b/src/lib/header.c @@ -32,6 +32,8 @@ #include "zck_private.h" +#define MAX_HEADER_IN_MEM 10*1024*1024 + #define VALIDATE(f) if(!f) { \ zck_log(ZCK_LOG_ERROR, \ "zckCtx not initialized\n"); \ @@ -52,12 +54,16 @@ return False; \ } -int check_flags(zckCtx *zck, char *header, size_t *length) { - zck->has_streams = header[8] & 0x01; +int check_flags(zckCtx *zck, char *header, size_t *length, size_t max_length) { + if(max_length < 4) { + zck_log(ZCK_LOG_ERROR, "Read past end of header\n"); + return False; + } + zck->has_streams = header[3] & 0x01; if(zck->has_streams) zck_log(ZCK_LOG_INFO, "Archive has streams\n"); - if((header[8] & 0xfe) != 0 || header[7] != 0 || header[6] != 0 || - header[5] != 0) { + if((header[3] & 0xfe) != 0 || header[2] != 0 || header[1] != 0 || + header[0] != 0) { zck_log(ZCK_LOG_ERROR, "Unknown flags(s) set\n"); return False; } @@ -65,115 +71,198 @@ int check_flags(zckCtx *zck, char *header, size_t *length) { return True; } -int add_to_header_string(zckCtx *zck, char *data, size_t length) { - VALIDATE(zck); - - zck->header_string = realloc(zck->header_string, zck->header_size + length); - if(zck->header_string == NULL) { - zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", - zck->header_size + length); - return False; - } - memcpy(zck->header_string + zck->header_size, data, length); - zck->header_size += length; - return True; -} +int read_lead_1(zckCtx *zck) { + VALIDATE_READ(zck); -int add_to_sig_string(zckCtx *zck, char *data, size_t length) { - VALIDATE(zck); + int lead = 5 + 2*MAX_COMP_SIZE; - zck->sig_string = realloc(zck->sig_string, zck->sig_size + length); - if(zck->sig_string == NULL) { - zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", - zck->sig_size + length); + char *header = zmalloc(lead); + if(header == NULL) { + zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", lead); return False; } - memcpy(zck->sig_string + zck->sig_size, data, length); - zck->sig_size += length; - return True; -} - -int zck_read_initial(zckCtx *zck) { - VALIDATE_READ(zck); - - char *header = NULL; size_t length = 0; - zck_log(ZCK_LOG_DEBUG, "Reading magic, flags and hash type\n"); - if(read_header(zck, &header, 9 + MAX_COMP_SIZE) < 9 + MAX_COMP_SIZE) + if(read_data(zck->fd, header, lead) < lead) return False; if(memcmp(header, "\0ZCK1", 5) != 0) { free(header); zck_log(ZCK_LOG_ERROR, - "Invalid header, perhaps this is not a zck file?\n"); + "Invalid lead, perhaps this is not a zck file?\n"); return False; } length += 5; - if(!check_flags(zck, header, &length)) - return False; + /* Read hash type for header and full digest and initialize check hash */ int hash_type = 0; - if(!compint_to_int(&hash_type, header+length, &length)) + if(!compint_to_int(&hash_type, header+length, &length, lead)) + return False; + if(zck->prep_hash_type > -1 && zck->prep_hash_type != hash_type) { + zck_log(ZCK_LOG_ERROR, + "Hash type (%i) doesn't match requested hash type " + "(%i)\n", hash_type, zck->prep_hash_type); return False; + } if(!zck_hash_setup(&(zck->hash_type), hash_type)) return False; + zck_log(ZCK_LOG_DEBUG, "Setting header and full digest hash type to %s\n", + zck_hash_name_from_type(hash_type)); - /* Return any unused bytes from read_header */ - if(!read_header_unread(zck, 9 + MAX_COMP_SIZE - length)) + /* Read header size */ + size_t header_length = 0; + if(!compint_to_size(&header_length, header+length, &length, lead)) return False; + if(zck->prep_hdr_size > -1 && (size_t)zck->prep_hdr_size != header_length) { + zck_log(ZCK_LOG_ERROR, + "Header length (%lu) doesn't match requested header length " + "(%lu)\n", header_length, zck->prep_hdr_size); + return False; + } + zck->header_length = header_length; - return add_to_header_string(zck, header, length); + zck->header = header; + zck->header_size = lead; + zck->lead_string = header; + zck->lead_size = length; + zck->hdr_digest_loc = length; + return True; } -int zck_read_header_hash(zckCtx *zck) { +int read_lead_2(zckCtx *zck) { VALIDATE_READ(zck); - if(zck->header_string == NULL) { + if(zck->lead_string == NULL || zck->lead_size == 0) { zck_log(ZCK_LOG_ERROR, - "Reading index hash before initial bytes are read\n"); + "Reading lead step 2 before lead step 1 is read\n"); return False; } - char *header = NULL; + char *header = zck->lead_string; + size_t length = zck->lead_size; + size_t lead = zck->header_size; - char *digest = zmalloc(zck->hash_type.digest_size); - if(digest == NULL) { + /* Read header digest */ + zck_log(ZCK_LOG_DEBUG, "Reading header digest\n"); + header = realloc(header, length + zck->hash_type.digest_size); + if(header == NULL) { + zck_log(ZCK_LOG_ERROR, "Unable to re-allocate %lu bytes\n", + length + zck->hash_type.digest_size); + return False; + } + size_t to_read = 0; + if(lead < length + zck->hash_type.digest_size) + to_read = length + zck->hash_type.digest_size - lead; + if(read_data(zck->fd, header + lead, to_read) < to_read) + return False; + lead += to_read; + + if(zck->prep_digest && + memcmp(zck->prep_digest, header + length, zck->hash_type.digest_size) != 0) { + zck_log(ZCK_LOG_ERROR, + "Header digest doesn't match requested header digest\n"); + return False; + } + zck->header_digest = zmalloc(zck->hash_type.digest_size); + if(zck->header_digest == NULL) { zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", zck->hash_type.digest_size); return False; } - zck_log(ZCK_LOG_DEBUG, "Reading header hash\n"); - if(read_header(zck, &header, zck->hash_type.digest_size) - < zck->hash_type.digest_size) { - free(digest); + memcpy(zck->header_digest, header + length, zck->hash_type.digest_size); + length += zck->hash_type.digest_size; + + /* Store pre-header */ + zck->header = header; + zck->header_size = lead; + zck->lead_string = header; + zck->lead_size = length; + zck_log(ZCK_LOG_DEBUG, "Parsed lead: %lu bytes\n", length); + return True; +} + +int validate_header(zckCtx *zck) { + if(zck->header_length > MAX_HEADER_IN_MEM) { + + } + + /* Allocate header and store any extra bytes at beginning of header */ + zck->header = realloc(zck->header, zck->lead_size + zck->header_length); + if(zck->header == NULL) { + zck_log(ZCK_LOG_ERROR, "Unable to reallocate %lu bytes\n", + zck->lead_size + zck->header_length); + return False; + } + zck->lead_string = zck->header; + char *header = zck->header + zck->lead_size; + size_t loaded = 0; + + if(zck->header_length < zck->header_size - zck->lead_size) { + zck_log(ZCK_LOG_ERROR, "Header size is too small for actual data\n"); return False; } - memcpy(digest, header, zck->hash_type.digest_size); - zck->header_digest = digest; + if(zck->lead_size < zck->header_size) + loaded = zck->header_size - zck->lead_size; + + /* Read header from file */ + zck_log(ZCK_LOG_DEBUG, "Reading the rest of the header: %lu bytes\n", + zck->header_length); + if(loaded < zck->header_length) { + if(!read_data(zck->fd, header + loaded, zck->header_length - loaded)) + return False; + zck->header_size = zck->lead_size + zck->header_length; + } + + if(!zck_hash_init(&(zck->check_full_hash), &(zck->hash_type))) + return False; + if(!zck_hash_update(&(zck->check_full_hash), zck->header, + zck->hdr_digest_loc)) + return False; + if(!zck_hash_update(&(zck->check_full_hash), header, zck->header_length)) + return False; + if(!zck_validate_header(zck)) + return False; return True; } -int zck_read_ct_is(zckCtx *zck) { +int read_preface(zckCtx *zck) { VALIDATE_READ(zck); - if(zck->header_string == NULL) { + if(zck->header_digest == NULL) { zck_log(ZCK_LOG_ERROR, - "Reading compression type before hash type is read\n"); + "Reading preface before lead is read\n"); return False; } - char *header = NULL; + char *header = zck->header + zck->lead_size; size_t length = 0; + size_t max_length = zck->header_length; - zck_log(ZCK_LOG_DEBUG, "Reading compression type and index size\n"); - if(read_header(zck, &header, MAX_COMP_SIZE*2) < MAX_COMP_SIZE*2) + /* Read data digest */ + zck_log(ZCK_LOG_DEBUG, "Reading data digest\n"); + if(length + zck->hash_type.digest_size > max_length) { + zck_log(ZCK_LOG_ERROR, "Read past end of header\n"); return False; + } + zck->full_hash_digest = zmalloc(zck->hash_type.digest_size); + if(!zck->full_hash_digest) { + zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", + zck->hash_type.digest_size); + return False; + } + memcpy(zck->full_hash_digest, header+length, zck->hash_type.digest_size); + length += zck->hash_type.digest_size; + /* Read flags */ + if(!check_flags(zck, header+length, &length, max_length-length)) + return False; + + /* Setup for reading compression type */ + zck_log(ZCK_LOG_DEBUG, "Reading compression type and index size\n"); int tmp = 0; /* Read and initialize compression type */ - if(!compint_to_int(&tmp, header, &length)) + if(!compint_to_int(&tmp, header+length, &length, max_length)) return False; if(!zck_set_ioption(zck, ZCK_COMP_TYPE, tmp)) return False; @@ -181,59 +270,56 @@ int zck_read_ct_is(zckCtx *zck) { return False; /* Read and initialize index size */ - if(!compint_to_int(&tmp, header + length, &length)) + if(!compint_to_int(&tmp, header+length, &length, max_length)) return False; zck->index_size = tmp; - /* Return any unused bytes from read_header */ - if(!read_header_unread(zck, MAX_COMP_SIZE*2 - length)) - return False; - - return add_to_header_string(zck, header, length); -} - -int zck_header_hash(zckCtx *zck) { - /* Calculate checksum to this point */ - if(!zck_hash_init(&(zck->check_full_hash), &(zck->hash_type))) - return False; - if(!zck_hash_update(&(zck->check_full_hash), zck->header_string, - zck->header_size)) - return False; + zck->preface_string = header; + zck->preface_size = length; return True; } -int zck_read_index(zckCtx *zck) { +int read_index(zckCtx *zck) { VALIDATE_READ(zck); + if(zck->preface_string == NULL) { + zck_log(ZCK_LOG_ERROR, + "Reading index before preface is read\n"); + return False; + } + char *header = NULL; zck_log(ZCK_LOG_DEBUG, "Reading index\n"); - if(!read_header(zck, &header, zck->index_size)) + if(zck->lead_size + zck->preface_size + zck->index_size > + zck->header_size) { + zck_log(ZCK_LOG_ERROR, "Read past end of header\n"); return False; - - if(!zck_index_read(zck, header, zck->index_size)) + } + header = zck->header + zck->lead_size + zck->preface_size; + int max_length = zck->header_size - (zck->lead_size + zck->preface_size); + if(!zck_index_read(zck, header, zck->index_size, max_length)) return False; + zck->index_string = header; return True; } -int zck_read_sig(zckCtx *zck) { +int read_sig(zckCtx *zck) { VALIDATE_READ(zck); - if(zck->header_string == NULL) { + if(zck->index_string == NULL) { zck_log(ZCK_LOG_ERROR, - "Reading signatures before hash type is read\n"); + "Reading signatures before index is read\n"); return False; } - char *header = NULL; + char *header = zck->header + zck->lead_size + zck->preface_size + + zck->index_size; + size_t max_length = zck->header_size - (zck->lead_size + zck->preface_size + + zck->index_size); size_t length = 0; - /* Get signature size */ - ssize_t rd = read_header(zck, &header, MAX_COMP_SIZE); - if(rd < 0) - return False; - - if(!compint_to_int(&(zck->sigs.count), header, &length)) + if(!compint_to_int(&(zck->sigs.count), header, &length, max_length)) return False; /* We don't actually support signatures yet, so bail if there is one */ @@ -243,75 +329,59 @@ int zck_read_sig(zckCtx *zck) { return False; } - if(!zck_hash_update(&(zck->check_full_hash), header, - length)) - return False; + /* Set data_offset */ + zck->data_offset = zck->lead_size + zck->header_length; - /* Return any unused bytes from read_header */ - if(!read_header_unread(zck, rd - length)) - return False; + if(zck->header_size > + zck->lead_size + zck->preface_size + zck->index_size + length) + zck_log(ZCK_LOG_WARNING, "There are %lu unused bytes in the header\n"); - zck->data_offset = zck->hdr_buf_size; - return add_to_sig_string(zck, header, length); + zck->sig_size = length; + zck->sig_string = header; + return True; } int zck_read_header(zckCtx *zck) { VALIDATE_READ(zck); - if(!zck_read_initial(zck)) - return False; - if(!zck_read_header_hash(zck)) + if(!read_lead_1(zck)) return False; - if(!zck_read_ct_is(zck)) + if(!read_lead_2(zck)) return False; - if(!zck_header_hash(zck)) + if(!validate_header(zck)) return False; - if(!zck_read_index(zck)) + if(!read_preface(zck)) return False; - if(!zck_read_sig(zck)) + if(!read_index(zck)) return False; - if(!close_read_header(zck)) - return False; - if(!zck_validate_header(zck)) + if(!read_sig(zck)) return False; if(!zck_import_dict(zck)) return False; return True; } -int zck_header_create(zckCtx *zck) { - int header_malloc = 9 + MAX_COMP_SIZE + zck->hash_type.digest_size + - MAX_COMP_SIZE*2; +int preface_create(zckCtx *zck) { + int header_malloc = zck->hash_type.digest_size + 4 + 2*MAX_COMP_SIZE; char *header = zmalloc(header_malloc); if(header == NULL) { zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", header_malloc); return False; } - size_t start = 0; size_t length = 0; - memcpy(header, "\0ZCK1", 5); - length += 5; - /* First three bytes of flags are always 0 */ + + /* Write out the full data digest */ + memcpy(header + length, zck->full_hash_digest, zck->hash_type.digest_size); + length += zck->hash_type.digest_size; + + /* Write out flags */ + memset(header + length, 0, 3); length += 3; /* Final byte for flags */ if(zck->has_streams) header[length] &= 1; length += 1; - compint_from_size(header+length, zck->hash_type.type, &length); - if(!add_to_header_string(zck, header, length)) { - free(header); - return False; - } - start = length; - - /* If we have the digest, write it in, otherwise write zeros */ - if(zck->header_digest) - memcpy(header+length, zck->header_digest, zck->hash_type.digest_size); - else - memset(header+length, 0, zck->hash_type.digest_size); - length += zck->hash_type.digest_size; - start = length; /* Write out compression type and index size */ if(!compint_from_int(header+length, zck->comp.type, &length)) { @@ -319,11 +389,6 @@ int zck_header_create(zckCtx *zck) { return False; } compint_from_size(header+length, zck->index_size, &length); - if(!add_to_header_string(zck, header+start, length-start)) { - free(header); - return False; - } - start = length; /* Shrink header to actual size */ header = realloc(header, length); @@ -331,14 +396,14 @@ int zck_header_create(zckCtx *zck) { zck_log(ZCK_LOG_ERROR, "Unable to reallocate %lu bytes\n", length); return False; } - if(zck->hdr_buf) - free(zck->hdr_buf); - zck->hdr_buf = header; - zck->hdr_buf_size = length; + + zck->preface_string = header; + zck->preface_size = length; + zck_log(ZCK_LOG_DEBUG, "Generated preface: %lu bytes\n", zck->preface_size); return True; } -int zck_sig_create(zckCtx *zck) { +int sig_create(zckCtx *zck) { char *header = zmalloc(MAX_COMP_SIZE); if(header == NULL) { zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", MAX_COMP_SIZE); @@ -358,22 +423,130 @@ int zck_sig_create(zckCtx *zck) { } zck->sig_string = header; zck->sig_size = length; + zck_log(ZCK_LOG_DEBUG, "Generated signatures: %lu bytes\n", zck->sig_size); return True; } -int zck_write_header(zckCtx *zck) { - VALIDATE_WRITE(zck); +int lead_create(zckCtx *zck) { + int phs = 5 + 2*MAX_COMP_SIZE + zck->hash_type.digest_size; + char *header = zmalloc(phs); + if(header == NULL) { + zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", phs); + return False; + } + size_t length = 0; + memcpy(header, "\0ZCK1", 5); + length += 5; + + /* Write out full data and header hash type */ + compint_from_size(header + length, zck->hash_type.type, &length); + /* Write out header length */ + zck->header_length = zck->preface_size + zck->index_size + zck->sig_size; + compint_from_size(header + length, zck->header_length, &length); + /* Skip header digest; we'll fill it in later */ + zck->hdr_digest_loc = length; + length += zck->hash_type.digest_size; - if(!write_data(zck->fd, zck->hdr_buf, zck->hdr_buf_size)) + header = realloc(header, length); + if(header == NULL) { + zck_log(ZCK_LOG_ERROR, "Unable to reallocate %lu bytes\n", length); return False; + } + + zck->lead_string = header; + zck->lead_size = length; + zck_log(ZCK_LOG_DEBUG, "Generated lead: %lu bytes\n", zck->lead_size); return True; } +int zck_header_create(zckCtx *zck) { + /* Rebuild header without header hash */ + if(zck->header_digest) { + free(zck->header_digest); + zck->header_digest = NULL; + } + + /* Generate index */ + if(!index_create(zck)) + return False; -int zck_write_sigs(zckCtx *zck) { + /* Generate preface */ + if(!preface_create(zck)) + return False; + + /* Rebuild signatures */ + if(!sig_create(zck)) + return False; + + /* Rebuild pre-header */ + if(!lead_create(zck)) + return False; + + /* Calculate data offset */ + zck->data_offset = zck->lead_size + zck->preface_size + + zck->index_size + zck->sig_size; + + /* Merge everything into one large string */ + zck_log(ZCK_LOG_DEBUG, "Merging into header: %lu bytes\n", + zck->data_offset); + zck->header = zmalloc(zck->data_offset); + if(zck->header == NULL) { + zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", + zck->data_offset); + return False; + } + size_t offs = 0; + memcpy(zck->header + offs, zck->lead_string, zck->lead_size); + free(zck->lead_string); + zck->lead_string = zck->header + offs; + offs += zck->lead_size; + memcpy(zck->header + offs, zck->preface_string, zck->preface_size); + free(zck->preface_string); + zck->preface_string = zck->header + offs; + offs += zck->preface_size; + memcpy(zck->header + offs, zck->index_string, zck->index_size); + free(zck->index_string); + zck->index_string = zck->header + offs; + offs += zck->index_size; + memcpy(zck->header + offs, zck->sig_string, zck->sig_size); + free(zck->sig_string); + zck->sig_string = zck->header + offs; + zck->header_size = zck->data_offset; + + zckHash header_hash = {0}; + + /* Calculate hash of header */ + if(!zck_hash_init(&header_hash, &(zck->hash_type))) + return False; + zck_log(ZCK_LOG_DEBUG, "Hashing lead\n"); + /* Hash lead up to header digest */ + if(!zck_hash_update(&header_hash, zck->lead_string, + zck->hdr_digest_loc)) + return False; + zck_log(ZCK_LOG_DEBUG, "Hashing the rest\n"); + /* Hash rest of header */ + if(!zck_hash_update(&header_hash, zck->preface_string, zck->header_length)) + return False; + zck->header_digest = zck_hash_finalize(&header_hash); + if(zck->header_digest == NULL) { + zck_log(ZCK_LOG_ERROR, + "Unable to calculate %s checksum for index\n", + zck_hash_name_from_type(zck->hash_type.type)); + return False; + } + /* Write digest to header */ + memcpy(zck->lead_string+zck->hdr_digest_loc, zck->header_digest, + zck->hash_type.digest_size); + + return True; +} + +int zck_write_header(zckCtx *zck) { VALIDATE_WRITE(zck); - if(!write_data(zck->fd, zck->sig_string, zck->sig_size)) + zck_log(ZCK_LOG_DEBUG, "Writing header: %lu bytes\n", + zck->lead_size); + if(!write_data(zck->fd, zck->header, zck->header_size)) return False; return True; } diff --git a/src/lib/index/index_common.c b/src/lib/index/index_common.c index 0fc081e..0484cd9 100644 --- a/src/lib/index/index_common.c +++ b/src/lib/index/index_common.c @@ -80,26 +80,14 @@ void zck_index_free(zckCtx *zck) { free(zck->full_hash.ctx); zck->full_hash.ctx = NULL; } - if(zck->index_string) { - free(zck->index_string); - zck->index_string = NULL; - } - if(zck->header_string) { - free(zck->header_string); - zck->header_string = NULL; - } - zck->header_size = 0; - if(zck->sig_string) { - free(zck->sig_string); - zck->sig_string = NULL; - } + zck->lead_string = NULL; + zck->lead_size = 0; + zck->preface_string = NULL; + zck->preface_size = 0; + zck->index_string = NULL; + zck->index_size = 0; + zck->sig_string = NULL; zck->sig_size = 0; - if(zck->hdr_buf) { - free(zck->hdr_buf); - zck->hdr_buf = NULL; - } - zck->hdr_buf_read = 0; - zck->hdr_buf_size = 0; if(zck->header_digest) { free(zck->header_digest); zck->header_digest = NULL; diff --git a/src/lib/index/index_create.c b/src/lib/index/index_create.c index a09fe97..1070881 100644 --- a/src/lib/index/index_create.c +++ b/src/lib/index/index_create.c @@ -36,10 +36,9 @@ return False; \ } -int zck_index_finalize(zckCtx *zck) { +int index_create(zckCtx *zck) { VALIDATE(zck); - zckHash index_hash; char *index; size_t index_malloc = 0; size_t index_size = 0; @@ -49,8 +48,8 @@ int zck_index_finalize(zckCtx *zck) { if(zck->full_hash_digest == NULL) return False; - index_malloc = MAX_COMP_SIZE * 2; // Chunk hash type and # of index entries - index_malloc += zck->hash_type.digest_size; // Full hash digest + /* Set initial malloc size */ + index_malloc = MAX_COMP_SIZE * 2; /* Add digest size + MAX_COMP_SIZE bytes for length of each entry in * index */ @@ -66,8 +65,6 @@ int zck_index_finalize(zckCtx *zck) { index = zmalloc(index_malloc); compint_from_size(index+index_size, zck->index.hash_type, &index_size); compint_from_size(index+index_size, zck->index.count, &index_size); - memcpy(index+index_size, zck->full_hash_digest, zck->hash_type.digest_size); - index_size += zck->hash_type.digest_size; if(zck->index.first) { zckIndexItem *tmp = zck->index.first; while(tmp) { @@ -91,48 +88,7 @@ int zck_index_finalize(zckCtx *zck) { } zck->index_string = index; zck->index_size = index_size; - - /* Rebuild header without index hash */ - if(zck->header_digest) { - free(zck->header_digest); - zck->header_digest = NULL; - } - if(!zck_header_create(zck)) - return False; - - /* Rebuild signatures */ - if(!zck_sig_create(zck)) - return False; - - /* Calculate hash of header */ - if(!zck_hash_init(&index_hash, &(zck->hash_type))) { - free(index); - return False; - } - if(!zck_hash_update(&index_hash, zck->header_string, zck->header_size)) { - free(index); - return False; - } - if(!zck_hash_update(&index_hash, zck->index_string, zck->index_size)) { - free(index); - return False; - } - if(!zck_hash_update(&index_hash, zck->sig_string, zck->sig_size)) { - free(index); - return False; - } - zck->header_digest = zck_hash_finalize(&index_hash); - if(zck->header_digest == NULL) { - zck_log(ZCK_LOG_ERROR, - "Unable to calculate %s checksum for index\n", - zck_hash_name_from_type(zck->hash_type.type)); - return False; - } - - /* Rebuild header string with calculated index hash */ - if(!zck_header_create(zck)) - return False; - + zck_log(ZCK_LOG_DEBUG, "Generated index: %lu bytes\n", zck->index_size); return True; } @@ -255,7 +211,3 @@ int zck_index_finish_chunk(zckCtx *zck) { zck_hash_close(&(zck->work_index_hash)); return True; } - -int zck_write_index(zckCtx *zck) { - return write_data(zck->fd, zck->index_string, zck->index_size); -} diff --git a/src/lib/index/index_read.c b/src/lib/index/index_read.c index d50b337..e600f97 100644 --- a/src/lib/index/index_read.c +++ b/src/lib/index/index_read.c @@ -32,45 +32,31 @@ #include "zck_private.h" -int zck_index_read(zckCtx *zck, char *data, size_t size) { +int zck_index_read(zckCtx *zck, char *data, size_t size, size_t max_length) { size_t length = 0; - /* Add index to checksum */ - if(!zck_hash_update(&(zck->check_full_hash), data, size)) - return False; - - /* Make sure there's at least enough data for full digest and index count */ - if(size < zck->hash_type.digest_size + MAX_COMP_SIZE*2) { - zck_log(ZCK_LOG_ERROR, "Index is too small to read\n"); - return False; - } /* Read and configure hash type */ int hash_type; - if(!compint_to_int(&hash_type, data + length, &length)) + if(!compint_to_int(&hash_type, data + length, &length, max_length)) return False; if(!zck_set_ioption(zck, ZCK_HASH_CHUNK_TYPE, hash_type)) return False; /* Read number of index entries */ size_t index_count; - if(!compint_to_size(&index_count, data + length, &length)) + if(!compint_to_size(&index_count, data + length, &length, max_length)) return False; zck->index.count = index_count; - /* Read full data hash */ - zck->full_hash_digest = zmalloc(zck->hash_type.digest_size); - if(!zck->full_hash_digest) { - zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", - zck->hash_type.digest_size); - return False; - } - memcpy(zck->full_hash_digest, data + length, zck->hash_type.digest_size); - length += zck->hash_type.digest_size; - zckIndexItem *prev = zck->index.first; size_t idx_loc = 0; while(length < size) { + if(length + zck->index.digest_size > max_length) { + zck_log(ZCK_LOG_ERROR, "Read past end of header\n"); + return False; + } + zckIndexItem *new = zmalloc(sizeof(zckIndexItem)); if(!new) { zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", @@ -91,14 +77,14 @@ int zck_index_read(zckCtx *zck, char *data, size_t size) { /* Read and store entry length */ size_t chunk_length = 0; - if(!compint_to_size(&chunk_length, data+length, &length)) + if(!compint_to_size(&chunk_length, data+length, &length, max_length)) return False; new->start = idx_loc; new->comp_length = chunk_length; /* Read and store uncompressed entry length */ chunk_length = 0; - if(!compint_to_size(&chunk_length, data+length, &length)) + if(!compint_to_size(&chunk_length, data+length, &length, max_length)) return False; new->length = chunk_length; diff --git a/src/lib/io.c b/src/lib/io.c index f841cf9..e9a70e6 100644 --- a/src/lib/io.c +++ b/src/lib/io.c @@ -33,57 +33,6 @@ #include "zck_private.h" -int read_header_unread(zckCtx *zck, size_t length) { - if(zck->hdr_buf_size < length) { - zck_log(ZCK_LOG_ERROR, - "Attempting to unread %lu bytes while only %lu were read\n", - length, zck->hdr_buf_size); - return False; - } - zck->hdr_buf_size -= length; - return True; -} - -ssize_t read_header(zckCtx *zck, char **data, size_t length) { - while(zck->hdr_buf_size + length > zck->hdr_buf_read) { - zck->hdr_buf = realloc(zck->hdr_buf, zck->hdr_buf_size + length); - if(zck->hdr_buf == NULL) { - zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", - zck->hdr_buf_size + length); - return -1; - } - ssize_t rd = read_data(zck->fd, zck->hdr_buf + zck->hdr_buf_read, - zck->hdr_buf_size + length - zck->hdr_buf_read); - if(rd < 0) - return -1; - zck->hdr_buf_read = zck->hdr_buf_read + rd; - length = zck->hdr_buf_read - zck->hdr_buf_size; - } - *data = zck->hdr_buf + zck->hdr_buf_size; - zck->hdr_buf_size += length; - return length; -} - -int close_read_header(zckCtx *zck) { - if(zck->hdr_buf_read > zck->hdr_buf_size) { - zck->comp.data = zmalloc(zck->hdr_buf_read - zck->hdr_buf_size); - if(zck->comp.data == NULL) { - zck_log(ZCK_LOG_ERROR, "Unable to allocate %lu bytes\n", - zck->hdr_buf_read - zck->hdr_buf_size); - return False; - } - memcpy(zck->comp.data, zck->hdr_buf + zck->hdr_buf_size, - zck->hdr_buf_read - zck->hdr_buf_size); - zck->comp.data_size = zck->hdr_buf_read - zck->hdr_buf_size; - zck->comp.data_loc = zck->comp.data_size; - } - free(zck->hdr_buf); - zck->hdr_buf = NULL; - zck->hdr_buf_read = 0; - zck->hdr_buf_size = 0; - return True; -} - ssize_t read_data(int fd, char *data, size_t length) { if(length == 0) return 0; @@ -134,7 +83,7 @@ int read_comp_size(int fd, size_t *val, size_t *length) { *val = 0; return False; } - return !compint_to_size(val, data, length); + return !compint_to_size(val, data, length, MAX_COMP_SIZE); } int seek_data(int fd, off_t offset, int whence) { diff --git a/src/lib/zck.c b/src/lib/zck.c index 67f5930..8a92345 100644 --- a/src/lib/zck.c +++ b/src/lib/zck.c @@ -87,17 +87,10 @@ int PUBLIC zck_close(zckCtx *zck) { if(zck->mode == ZCK_MODE_WRITE) { if(zck_end_chunk(zck) < 0) return False; - if(!zck_index_finalize(zck)) + if(!zck_header_create(zck)) return False; - zck_log(ZCK_LOG_DEBUG, "Writing header\n"); if(!zck_write_header(zck)) return False; - zck_log(ZCK_LOG_DEBUG, "Writing index\n"); - if(!zck_write_index(zck)) - return False; - zck_log(ZCK_LOG_DEBUG, "Writing signatures\n"); - if(!zck_write_sigs(zck)) - return False; zck_log(ZCK_LOG_DEBUG, "Writing chunks\n"); if(!chunks_from_temp(zck)) return False; @@ -129,6 +122,10 @@ void zck_clear(zckCtx *zck) { if(zck == NULL) return; zck_index_free(zck); + if(zck->header) + free(zck->header); + zck->header = NULL; + zck->header_size = 0; if(!zck_comp_close(zck)) zck_log(ZCK_LOG_WARNING, "Unable to close compression\n"); zck_hash_close(&(zck->full_hash)); @@ -164,6 +161,8 @@ zckCtx PUBLIC *zck_create() { sizeof(zckCtx)); return False; } + zck->prep_hash_type = -1; + zck->prep_hdr_size = -1; return zck; } diff --git a/src/lib/zck_private.h b/src/lib/zck_private.h index f12fd01..c29a94f 100644 --- a/src/lib/zck_private.h +++ b/src/lib/zck_private.h @@ -115,16 +115,26 @@ typedef struct zckCtx { char *full_hash_digest; char *header_digest; - char *hdr_buf; - size_t hdr_buf_size; - size_t hdr_buf_read; - char *header_string; + size_t data_offset; + size_t header_length; + + char *header; size_t header_size; - char *sig_string; - size_t sig_size; + size_t hdr_digest_loc; + char *lead_string; + size_t lead_size; + char *preface_string; + size_t preface_size; char *index_string; size_t index_size; - size_t data_offset; + char *sig_string; + size_t sig_size; + + + char *prep_digest; + int prep_hash_type; + ssize_t prep_hdr_size; + zckIndex index; zckIndexItem *work_index_item; zckHash work_index_hash; @@ -180,9 +190,9 @@ int set_chunk_hash_type(zckCtx *zck, int hash_type) __attribute__ ((warn_unused_result)); /* index/index.c */ -int zck_index_read(zckCtx *zck, char *data, size_t size) +int zck_index_read(zckCtx *zck, char *data, size_t size, size_t max_length) __attribute__ ((warn_unused_result)); -int zck_index_finalize(zckCtx *zck) +int index_create(zckCtx *zck) __attribute__ ((warn_unused_result)); int zck_index_new_chunk(zckIndex *index, char *digest, int digest_size, size_t comp_size, size_t orig_size, int finished) @@ -215,25 +225,19 @@ int read_comp_size(int fd, size_t *val, size_t *length) __attribute__ ((warn_unused_result)); int chunks_from_temp(zckCtx *zck) __attribute__ ((warn_unused_result)); -ssize_t read_header(zckCtx *zck, char **data, size_t length) - __attribute__ ((warn_unused_result)); -int read_header_unread(zckCtx *zck, size_t length) - __attribute__ ((warn_unused_result)); -int close_read_header(zckCtx *zck) - __attribute__ ((warn_unused_result)); /* header.c */ -int zck_read_initial(zckCtx *zck) +int read_lead_1(zckCtx *zck) __attribute__ ((warn_unused_result)); -int zck_read_header_hash(zckCtx *zck) +int read_lead_2(zckCtx *zck) __attribute__ ((warn_unused_result)); -int zck_read_ct_is(zckCtx *zck) +int validate_header(zckCtx *zck) __attribute__ ((warn_unused_result)); -int zck_header_hash(zckCtx *zck) +int read_preface(zckCtx *zck) __attribute__ ((warn_unused_result)); -int zck_read_index(zckCtx *zck) +int read_index(zckCtx *zck) __attribute__ ((warn_unused_result)); -int zck_read_sig(zckCtx *zck) +int read_sig(zckCtx *zck) __attribute__ ((warn_unused_result)); int zck_read_header(zckCtx *zck) __attribute__ ((warn_unused_result)); @@ -282,9 +286,11 @@ int zck_dl_range_chk_chunk(zckDL *dl, char *url, int is_chunk) int compint_from_int(char *compint, int val, size_t *length) __attribute__ ((warn_unused_result)); void compint_from_size(char *compint, size_t val, size_t *length); -int compint_to_int(int *val, const char *compint, size_t *length) +int compint_to_int(int *val, const char *compint, size_t *length, + size_t max_length) __attribute__ ((warn_unused_result)); -int compint_to_size(size_t *val, const char *compint, size_t *length) +int compint_to_size(size_t *val, const char *compint, size_t *length, + size_t max_length) __attribute__ ((warn_unused_result)); diff --git a/test/empty.c b/test/empty.c index 23badd0..865f173 100644 --- a/test/empty.c +++ b/test/empty.c @@ -36,7 +36,7 @@ #include "zck_private.h" #include "util.h" -static char *checksum="42590fea6f55aca71e222ebe2d147ed89d17aac951a997fef8b980b2803a0412"; +static char *checksum="b09c9fd796877692c1ddf0a9a0201e9a901409a173061caceef80e7484a3ebca"; int main (int argc, char *argv[]) { /* Create empty zchunk file */ diff --git a/zchunk_format.txt b/zchunk_format.txt index dd12864..15b221a 100644 --- a/zchunk_format.txt +++ b/zchunk_format.txt @@ -1,19 +1,4 @@ -+-+-+-+-+-+-+-+-+-+====================+=================+ -| ID | Flags | Checksum type (ci) | Header checksum | -+-+-+-+-+-+-+-+-+-+====================+=================+ - -+========================+=================+=======+ -| Compression type (ci ) | Index size (ci) | Index | -+========================+=================+=======+ - -+======================+============+ -| Signature size (ci) | Signatures | -+======================+============+ - -+=================+===========+===========+ -| Compressed Dict | Chunk | Chunk | ==> More chunks -+=================+===========+===========+ - +Definitions: (ci) Compressed (unsigned) integer - An variable length little endian integer where the first seven bits of the number are stored in the @@ -22,15 +7,14 @@ the top bit of the final byte must be one, indicating the end of the number. -ID - '\0ZCK1', identifies file as zchunk version 1 file +The lead: -Flags - 32 bits for flags. All unused flags MUST be set to 0. If a decoder sees - a flag set that it doesn't recognize, it MUST exit with an error. Flags ++-+-+-+-+-+====================+=================+==================+ +| ID | Checksum type (ci) | Header checksum | Header size (ci) | ++-+-+-+-+-+====================+=================+==================+ - Current flags are: - bit 0: File has data streams +ID + '\0ZCK1', identifies file as zchunk version 1 file Checksum type This is an integer containing the type of checksum used to generate the @@ -44,6 +28,33 @@ Header checksum This is the checksum of everything from the beginning of the file until the end of the index, ignoring the header checksum. +Header size: + This is an integer containing the size of the header, not including the lead + + +The preface: + ++===============+-+-+-+-+========================+=================+=======+ +| Data checksum | Flags | Compression type (ci ) | Index size (ci) | Index | ++===============+-+-+-+-+========================+=================+=======+ + ++======================+============+ +| Signature count (ci) | Signatures | ++======================+============+ + +Data checksum + This is the checksum of everything after the index, including the + compressed dict and all the compressed chunks. This checksum is + generated using the overall checksum type, *not* the chunk checksum + type. + +Flags + 32 bits for flags. All unused flags MUST be set to 0. If a decoder sees + a flag set that it doesn't recognize, it MUST exit with an error. Flags + + Current flags are: + bit 0: File has data streams + Compression type This is an integer containing the type of compression used to compress dict and chunks. @@ -58,12 +69,20 @@ Index size Index This is the index, which is described in the next section. -Signature size - This is an integer countaining the size of the signature section. +Signature count + This is an integer countaining the number of signatures. Signatures These are the signatures, described in a later section. + +The data: + ++=================+===========+===========+ +| Compressed Dict | Chunk | Chunk | ==> More chunks ++=================+===========+===========+ + + Compressed Dict (optional) This is a custom dictionary used when compressing each chunk. Because each chunk is compressed completely separately from the @@ -78,9 +97,9 @@ Chunk The index: -+==========================+==================+===============+ -| Chunk checksum type (ci) | Chunk count (ci) | Data checksum | -+==========================+==================+===============+ ++==========================+==================+ +| Chunk checksum type (ci) | Chunk count (ci) | ++==========================+==================+ +==================+===============+==================+ | Dict stream (ci) | Dict checksum | Dict length (ci) | @@ -109,12 +128,6 @@ Chunk checksum type Chunk count This is a count of the number of chunks in the zchunk file. -Checksum of all data - This is the checksum of everything after the index, including the - compressed dict and all the compressed chunks. This checksum is - generated using the overall checksum type, *not* the chunk checksum - type. - Dict stream If the data streams flag is set, this must always be 0, otherwise don't include this integer